#################################################################################################
# DOES THE CROWD PROMISE TO FUND MORE INNOVATION?
# REPLICATION FILES  
# APPENDIX TABLES
#################################################################################################

#install.packages("texreg")
library(texreg)
library(plm)
library(stargazer)
library(ggplot2)
library(car)

#specify log function (to avoid infinity and NA errors in R)
log <- function(x) ifelse(x <= 0, 0, base::log(x))

###############################################
# TABLE S1: DESCRIPTIVE STATISTICS - COUNTY
###############################################
fe <- read.csv("yearly_fe_final.csv", header=TRUE, sep=",")

cs<-aggregate(fe,by=list(fe$fips),FUN=mean, na.rm=TRUE)
summary(cs)  # summary stats
sum(fe$amount) #total amount raised on KS
sum(fe$num_campaigns) # num. KS campaigns
sum(fe$num_successful) # num. successful KS campaigns
sum(fe$vc_count) # VC count
sum(fe$num_patents) # num. patents
sum(fe$num_citations) # num. citations

max(fe$amount) #total amount raised on KS
max(fe$num_campaigns) # num. KS campaigns
max(fe$num_successful) # num. successful KS campaigns
max(fe$vc_count) # VC count
max(fe$num_patents) # num. patents
max(fe$num_citations) # num. citations

fe <- read.csv("gini.csv", header=TRUE, sep=",")
fe_09 = subset(fe, year==2009)
fe_10 = subset(fe, year==2010)
fe_11 = subset(fe, year==2011)
fe_12 = subset(fe, year==2012)
fe_13 = subset(fe, year==2013)
fe_14 = subset(fe, year==2014)
fe_15 = subset(fe, year==2015)

mean_vc_pc = (fe_09$vc_count+fe_10$vc_count+fe_11$vc_count+fe_12$vc_count+fe_13$vc_count+fe_14$vc_count+fe_15$vc_count)/(fe_09$estimates_base_2010+fe_10$pop_estimate_2010+fe_11$pop_estimate_2011+fe_12$pop_estimate_2012+fe_13$pop_estimate_2013+fe_14$pop_estimate_2014+fe_15$pop_estimate_2015)
log_vc_pc = log(mean_vc_pc)
summary(log_vc_pc, na.omit=T)
sd(!is.na(log_vc_pc))

mean_pop =(fe_09$estimates_base_2010+fe_10$pop_estimate_2010+fe_11$pop_estimate_2011+fe_12$pop_estimate_2012+fe_13$pop_estimate_2013+fe_14$pop_estimate_2014+fe_15$pop_estimate_2015)/7
log_pop = log(mean_pop)
summary(log_pop, na.omit=T)
sd(!is.na(log_pop))

mean_ks_pc = (fe_09$num_campaigns +fe_10$num_campaigns +fe_11$num_campaigns +fe_12$num_campaigns +fe_13$num_campaigns +fe_14$num_campaigns +fe_15$num_campaigns)/(fe_09$estimates_base_2010+fe_10$pop_estimate_2010+fe_11$pop_estimate_2011+fe_12$pop_estimate_2012+fe_13$pop_estimate_2013+fe_14$pop_estimate_2014+fe_15$pop_estimate_2015)
log_ks_pc = log(mean_ks_pc)
summary(log_ks_pc, na.omit=T)
sd(!is.na(log_ks_pc))

###############################################
# TABLE S2: DESCRIPTIVE STATISTICS - COUNTY-YEAR
###############################################
#fe <- read.csv("yearly_fe_match_2.csv")
fe <- read.csv("yearly_fe_final.csv", header=TRUE, sep=",")
fe <- pdata.frame(fe, index=c("fips","year"), row.names=TRUE, drop.index=TRUE)

fe$log_count_vc <- log(fe$vc_count+1)
fe$log_amount_vc <- log(fe$vc_amount+1)
fe$log_patents <- log(fe$num_patents+1)
fe$log_citations <- log(fe$num_citations+1)
fe$log_amount_ks <- log(fe$amount+1)
fe$log_campaigns_ks <- log(fe$num_campaigns+1)
fe$log_successful_ks <- log(fe$num_successful+1)
fe$log_instrument_ks <- log(fe$instrument_successful+1)
fe$log_successful_ks_100 <- log(fe$successful_100+1)
attach(fe)
#stargazer(fe[,1:18], digits=3)  #summary statistics

mean(fe$log_campaigns_ks)
sd(fe$log_campaigns_ks)
min(fe$log_campaigns_ks)
max(fe$log_campaigns_ks)

mean(fe$log_successful_ks)
sd(fe$log_successful_ks)
min(fe$log_successful_ks)
max(fe$log_successful_ks)

mean(fe$log_amount_ks)
sd(fe$log_amount_ks)
min(fe$log_amount_ks)
max(fe$log_amount_ks)

mean(fe$log_count_vc)
sd(fe$log_count_vc)
min(fe$log_count_vc)
max(fe$log_count_vc)

mean(fe$log_amount_vc)
sd(fe$log_amount_vc)
min(fe$log_amount_vc)
max(fe$log_amount_vc)

mean(fe$log_patents)
sd(fe$log_patents)
min(fe$log_patents)
max(fe$log_patents)

mean(fe$log_citations)
sd(fe$log_citations)
min(fe$log_citations)
max(fe$log_citations)

###############################################
# TABLE S3: WITHIN REGION REGRESSIONS
###############################################
m1 <- plm(log_campaigns_ks ~ log_count_vc + log_patents + log_citations, data=fe, effect="twoways", model="within")
m2 <- plm(log_successful_ks ~ log_count_vc + log_patents + log_citations, data=fe, effect="twoways", model="within")
m3 <- plm(log_amount_ks ~ log_amount_vc + log_patents + log_citations, data=fe, effect="twoways", model="within")

texreg(list(m1, m2, m3),booktabs=TRUE,dcolumn=TRUE,digits=3,caption="Fixed effects regression estimates for $n = 3225$ counties observed over $T = 7$ years between 2009 and 2015. The column names indicate the dependent variables, all of which are log transformed. The row names indicate the independent variables, all of which are log transformed.",label="tab:3",center=TRUE,scalebox=1,float.pos="H",use.packages=FALSE)

################################################
# TABLE S4: LAGGED VARIABLE REGRESSIONS 
################################################
m1 <- plm(log_count_vc~ lag(log_successful_ks, 1) + lag(log_patents,1) + lag(log_citations,1), data=fe, effect="twoways", model="within")
m2 <- plm(log_count_vc~ lag(log_successful_ks, 2) + lag(log_patents,2) + lag(log_citations,2), data=fe, effect="twoways", model="within")
m3 <- plm(log_count_vc~ lag(log_successful_ks, 3) + lag(log_patents,3) + lag(log_citations,3), data=fe, effect="twoways", model="within")
m4 <- plm(log_count_vc~ lag(log_successful_ks, 1), data=fe, effect="twoways", model="within")
m5 <- plm(log_count_vc~ lag(log_successful_ks, 2), data=fe, effect="twoways", model="within")
m6 <- plm(log_count_vc~ lag(log_successful_ks, 3), data=fe, effect="twoways", model="within")

texreg(list(m4, m1, m5, m2, m6, m3),booktabs=TRUE,dcolumn=TRUE,digits=3,caption="Fixed effects regression estimates for $n = 3225$ counties observed over $T = 7$ years between 2009 and 2015. The column names indicate the dependent variables, all of which are log transformed. The row names indicate the independent variables, all of which are log transformed.",label="tab:4",center=TRUE,scalebox=1,float.pos="H",use.packages=FALSE)

################################################
# TABLE S5: TESTS FOR REVERSE CAUSALITY
################################################

m1 <- plm(log_successful_ks ~ lag(log_count_vc, 1) + lag(log_patents,1) + lag(log_citations,1), data=fe, effect="twoways", model="within")
m2 <- plm(log_successful_ks ~ lag(log_count_vc, 2) + lag(log_patents,2) + lag(log_citations,2), data=fe, effect="twoways", model="within")
m3 <- plm(log_successful_ks ~ lag(log_count_vc, 3) + lag(log_patents,3) + lag(log_citations,3), data=fe, effect="twoways", model="within")
m4 <- plm(log_successful_ks ~ lag(log_count_vc, 1), data=fe, effect="twoways", model="within")
m5 <- plm(log_successful_ks ~ lag(log_count_vc, 2), data=fe, effect="twoways", model="within")
m6 <- plm(log_successful_ks ~ lag(log_count_vc, 3), data=fe, effect="twoways", model="within")

texreg(list(m4, m1, m5, m2, m6, m3),booktabs=TRUE,dcolumn=TRUE,digits=3,caption="Fixed effects regression estimates for $n = 3225$ counties observed over $T = 7$ years between 2009 and 2015. The column names indicate the dependent variables, all of which are log transformed. The row names indicate the independent variables, all of which are log transformed.",label="tab:5",center=TRUE,scalebox=1,float.pos="H",use.packages=FALSE)

################################################
# TABLE S9 & S10: EFFECTS ON PATENTING
################################################

m1 <- plm(log_patents ~ lag(log_campaigns_ks, 1) + lag(log_patents,1) + lag(log_citations,1), data=fe, effect="twoways", model="within")
m2 <- plm(log_patents ~ lag(log_count_vc, 1) + lag(log_patents,1) + lag(log_citations,1), data=fe, effect="twoways", model="within")
m3 <- plm(log_patents ~ lag(log_successful_ks, 1) + lag(log_patents,1) + lag(log_citations,1), data=fe, effect="twoways", model="within")
m4 <- plm(log_patents ~ lag(log_amount_ks, 1) + lag(log_patents,1) + lag(log_citations,1), data=fe, effect="twoways", model="within")
m5 <- plm(log_patents ~ lag(log_amount_vc, 1) + lag(log_patents,1) + lag(log_citations,1), data=fe, effect="twoways", model="within")

texreg(list(m1, m2, m3, m4, m5),booktabs=TRUE,dcolumn=TRUE,digits=3,caption="Fixed effects regression estimates for $n = 3225$ counties observed over $T = 7$ years between 2009 and 2015. The column names indicate the dependent variables, all of which are log transformed. The row names indicate the independent variables, all of which are log transformed.",label="tab:9",center=TRUE,scalebox=1,float.pos="H",use.packages=FALSE)


#################################################################################
# TABLE S8: QUANTILE MODELS OF EFFECTS OF LAGGED KS ON SUBSEQUENT VC INVESTMENT
#################################################################################

require(quantreg)

#fe <- read.csv("yearly_fe_match_2.csv")
fe <- read.csv("yearly_fe_final.csv")

fe$log_count_vc <- log(fe$vc_count+1)
fe$log_amount_vc <- log(fe$vc_amount+1)
fe$log_patents <- log(fe$num_patents+1)
fe$log_citations <- log(fe$num_citations+1)
fe$log_amount_ks <- log(fe$amount+1)
fe$log_campaigns_ks <- log(fe$num_campaigns+1)
fe$log_successful_ks <- log(fe$num_successful+1)
fe$log_instrument_ks <- log(fe$instrument_successful+1)
fe$log_successful_ks_100 <- log(fe$successful_100+1)

fe = subset(fe, log_amount_ks!=0)
fe = subset(fe, log_amount_vc>0)

fe$ks <- fe$log_amount_ks - mean(fe$log_amount_ks)
fit1 <- summary(rq(fe$log_amount_vc ~lag(fe$ks,1),tau=2:98/100))
pdf("AAcoef.pdf", width=8.5,height=5.5)
plot(fit1,mfrow = c(1,2), main=c("Intercept", "Slope"))
dev.off()

pdf("quantplot.pdf",width=6,height=6)
plot(lag(fe$ks,1),fe$log_amount_vc,cex=.25,type="n",xlab="Ln amount KS (t-1)",ylab="Ln amount VC (t)", main="")
points(lag(fe$ks,1),fe$log_amount_vc,cex=.5,col="blue")
abline(rq(fe$log_amount_vc ~lag(fe$ks,1),tau=.5),col="lightgray")
abline(lm(fe$log_amount_vc ~lag(fe$ks,1)),lty=2,col="red", lwd=2)
#abline(v=0, lty=1, col="blue")
taus <- c(.05,.25,.5,.75,.95)
for( i in 1:length(taus)){
	abline(rq(fe$log_amount_vc ~lag(fe$ks,1), tau=taus[i]), col="gray50", lty=i, lwd=1.5)
}
legend(-10,23,c("q=.05","q=.25","q=.50","q=.75","q=.95"),lty=1:length(taus), lwd=1.5, col=c(rep("gray50",2),"red",rep("gray50",2)), bg="white")
dev.off()

m1 <- rq(fe$log_amount_vc ~lag(fe$ks,1) + lag(log(fe$num_patents),1) + lag(log(fe$num_citations),1) , tau=.01)
m2 <- rq(fe$log_amount_vc ~lag(fe$ks,1) + lag(log(fe$num_patents),1) + lag(log(fe$num_citations),1), tau=.25)
m3 <- rq(fe$log_amount_vc ~lag(fe$ks,1) + lag(log(fe$num_patents),1) + lag(log(fe$num_citations),1), tau=.50)
m4 <- rq(fe$log_amount_vc ~lag(fe$ks,1) + lag(log(fe$num_patents),1) + lag(log(fe$num_citations),1), tau=.75)
m5 <- rq(fe$log_amount_vc ~lag(fe$ks,1) + lag(log(fe$num_patents),1) + lag(log(fe$num_citations),1), tau=.99)

require(texreg)
texreg(list(m1, m2, m3, m4, m5), booktabs=TRUE,dcolumn=TRUE,digits=3,caption="Quantile regression estimates. Dependent variable is amount of Venture Capital funding for a region at time t (logged).",label="tab:8",center=TRUE,scalebox=1,float.pos="H",use.packages=FALSE, custom.model.names = c("VC Amount$_{t}$", "VC Amount$_{t}$", "VC Amount$_{t}$", "VC Amount$_{t}$", "VC Amount$_{t}$"), custom.coef.names=c("Intercept", "KS Amount$_{t-1}$", "Patents$_{t-1}$", "Citations$_{t-1}$"), reorder.coef=c(1,2,3,4))

anova(m1,m2,m3,m4,m5)  #analysis of deviance table
